Basic data visualization of beatmap info provided by osu! API. https://github.com/ppy/osu-api/wiki
Only uses ranked/loved/qualified maps. Graphs focusing on standard mode.
(A few maps, due to ranked/loved irregularities, are actually in graveyard but included in the API query anyway. Ex. https://osu.ppy.sh/b/766190&m=2 with 1 loved CtB diff. )
library(ggplot2)
library(gridExtra)
library(plyr)
library(jsonlite)
library(varhandle)
library(chron)
library(magrittr)
# Cached chunk
# Import data from JSON and remove duplicate rows
beatmaps <- unique(do.call("rbind", fromJSON("maps.json")))
# Set global figure width and height
knitr::opts_chunk$set(fig.width=10, fig.height=6)
# Disable warnings and messages
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
# Convert strings of ints and floats to numeric datatypes
beatmaps.isintcol <- sapply(beatmaps, function(col) all(check.numeric(col, only.integer=TRUE)))
beatmaps.isnumcol <- sapply(beatmaps, function(col) all(check.numeric(col))) & !beatmaps.isintcol
beatmaps[, beatmaps.isintcol] = sapply(beatmaps[, beatmaps.isintcol], as.integer)
beatmaps[, beatmaps.isnumcol] = sapply(beatmaps[, beatmaps.isnumcol], as.numeric)
# Convert MySQL datetimes to R datetimes. As far as the new site goes, this appears to be UTC-4, but
# this may be a local thing. Probably broken.
beatmaps$approved_date <- as.POSIXct(beatmaps$approved_date, tz="Etc/GMT+4")
beatmaps$last_update <- as.POSIXct(beatmaps$last_update, tz="Etc/GMT+4")
# Create labels and data frames for each gamemode
gamemodes <- c("std", "taiko", "ctb", "mania")
gamemode.labels <- c("Standard", "Taiko", "CtB", "Mania")
beatmaps$mode <- factor(beatmaps$mode, labels=gamemode.labels)
for (i in 1:4) {
assign(gamemodes[i], beatmaps[beatmaps$mode == gamemode.labels[i],])
}
# Various plot parameters for convenience
# These will usually leave a few outlier maps out
AR_y_scale <- scale_y_continuous(breaks=seq(0,10))
SR_y_scale <- scale_y_continuous(limits=c(0,10), breaks=seq(0,10,1))
approved_x_scale <- scale_x_datetime(date_breaks="1 year", date_labels="%Y")
legend_title_fill <- labs(fill="Mode")
legend_title_color <- labs(color="Mode")
# Center titles
theme_update(plot.title = element_text(hjust = 0.5))
diff_x_scale <- scale_x_continuous(limits=c(0,10), breaks=seq(0,10,0.5))
diff_hist <- geom_histogram(binwidth=0.05)
# Histogram of star rating (all modes)
ggplot(beatmaps, aes(difficultyrating, fill=as.factor(mode))) +
ggtitle("Total Star Rating (All Modes)") + legend_title_fill +
diff_x_scale + diff_hist
# Frequency polygon of SR (all modes)
ggplot(beatmaps, aes(difficultyrating, color=as.factor(mode))) +
ggtitle("Star Rating (All Modes)") + legend_title_color +
diff_x_scale + geom_freqpoly(binwidth=0.05)
# Histograms of SR (all modes) with separate y scales
diffplots <- llply(1:4, function(m) {
ggplot(beatmaps[as.numeric(beatmaps$mode) == m, ], aes(difficultyrating)) +
diff_x_scale + diff_hist +
ggtitle(gamemode.labels[m])
})
grid.arrange(grobs = diffplots, top="Star Rating Distributions (All Modes)")
The ranking criteria until recently required non-marathon maps to have a difficulty Normal or below. According to the new criteria, any map with less than 3:30 drain time requires a Normal or below. This explains the large quantity of maps between 1* and 2*.
After these Easy and Normal maps, the most popular standard maps are between 3* and 4*. There is a small but noticeable spike in standard maps at about 5.25*.
Taiko stands out for having the most unimodal-looking distribution.
length_x_scale <- scale_x_continuous(limits=c(0,600), breaks=seq(0,600,30))
length_hist <- geom_histogram(binwidth=1)
x_labels_90 <- theme(axis.text.x = element_text(angle=90, hjust=1))
# Histogram of total length (all modes)
ggplot(beatmaps, aes(total_length, fill=as.factor(mode))) +
ggtitle("Total Beatmap Length (All Modes)") + legend_title_fill +
length_x_scale + length_hist
# Frequency polygon of total length (all modes)
ggplot(beatmaps, aes(total_length, color=as.factor(mode))) +
ggtitle("Beatmap Length (All Modes)") + legend_title_color +
length_x_scale + geom_freqpoly(binwidth=1)
# Histograms of total length (all modes)
lengthplots <- llply(1:4, function(m) {
ggplot(beatmaps[as.numeric(beatmaps$mode) == m, ], aes(total_length)) +
length_hist + length_x_scale + x_labels_90 +
ggtitle(gamemode.labels[m])
})
grid.arrange(grobs=lengthplots, top="Beatmap Length Distributions (All Modes)")
The massive spike in maps 85-90 seconds long corresponds to the ever-popular TV Size map genre.
This occurs in every gamemode, though mania and taiko have relatively large numbers of two minute maps.
# Frequency polygons of playcount (all modes)
ggplot(beatmaps, aes(playcount, color=as.factor(mode))) +
ggtitle("Playcount (All Modes)") + legend_title_color +
scale_x_continuous(limits=c(0,1000000)) +
geom_freqpoly(binwidth=5000)
beatmaps$month <- cut(beatmaps$approved_date, breaks="month")
year_x_scale <- scale_x_discrete(breaks=unique(cut(beatmaps$approved_date, breaks="year")))
x_labels_45 <- theme(axis.text.x = element_text(angle=45, hjust=1))
# Bar chart of date approved (all modes)
ggplot(beatmaps, aes(x=month, fill=mode)) +
ggtitle("Date Approved (All Modes)") +
geom_bar(width=1) +
year_x_scale + x_labels_45
# Frequency polygon of date approved (all modes)
ggplot(beatmaps, aes(x=month, group=mode, color=mode)) +
ggtitle("Date Approved (All Modes)") +
legend_title_color +
geom_freqpoly(stat="count") +
year_x_scale + x_labels_45
library(knitr)
most.frequent.kable <- function(arr, lab) {
# Neat use of pipe
arr %>% table %>% sort(decreasing=TRUE) %>% head(20) %>% kable(col.names=c(lab, "Freq"))
}
beatmaps$artist %>% most.frequent.kable("Artist")
| Artist | Freq |
|---|---|
| Hatsune Miku | 653 |
| ClariS | 421 |
| KOTOKO | 398 |
| fripSide | 362 |
| xi | 349 |
| senya | 339 |
| IOSYS | 331 |
| Various Artists | 329 |
| LiSA | 323 |
| yanaginagi | 311 |
| Camellia | 309 |
| ZUN | 272 |
| Duca | 257 |
| Rita | 255 |
| M2U | 245 |
| Chata | 226 |
| u’s | 225 |
| Hanatan | 224 |
| nano | 224 |
| Suzuki Konomi | 211 |
beatmaps$title %>% most.frequent.kable("Title")
| Title | Freq |
|---|---|
| Piano 7K BMS Pack | 193 |
| Piano Beatmap Set | 114 |
| Harumachi Clover | 70 |
| Granat | 65 |
| Ai no Scenario | 62 |
| PEPPY FIX TAIKO STAR RATING PLEASE for a happier | 61 |
| Tokyo (Innovaderz Remix) | 61 |
| MIIRO | 58 |
| Hitorigoto -TV MIX- | 55 |
| Re:TrymenT | 54 |
| Haru Modoki | 52 |
| Natsukoi Hanabi | 52 |
| TSLove | 52 |
| Contrail Kiseki | 51 |
| Paradisus-Paradoxum | 51 |
| Untan Goose | 51 |
| Uso no Hibana | 50 |
| Six Trillion Years and Overnight Story | 49 |
| Gabriel Drop Kick | 43 |
| Oriental Blossom | 42 |
beatmaps$source %>% most.frequent.kable("Source")
| Source | Freq |
|---|---|
| 24652 | |
| Touhou | 2370 |
| BMS | 1557 |
| 東方Project | 754 |
| SOUND VOLTEX III GRAVITY WARS | 466 |
| DJMAX | 455 |
| beatmania IIDX | 398 |
| SOUND VOLTEX II -infinite infection- | 390 |
| osu! | 258 |
| Vocaloid | 242 |
| Taiko no Tatsujin | 222 |
| Deemo | 202 |
| jubeat | 187 |
| Nico Nico Douga | 178 |
| Love Live! School idol project | 174 |
| SOUND VOLTEX BOOTH | 169 |
| REFLEC BEAT groovin’!! | 166 |
| Cytus | 164 |
| K-ON!! | 153 |
| 艦隊これくしょん -艦これ- | 146 |
beatmaps$creator %>% most.frequent.kable("Creator")
| Creator | Freq |
|---|---|
| osuplayer111 | 572 |
| Sotarks | 571 |
| DJPop | 563 |
| tutuhaha | 387 |
| ztrot | 377 |
| Larto | 354 |
| Natsu | 345 |
| Monstrata | 331 |
| Ascendance | 311 |
| pishifat | 311 |
| Lasse | 310 |
| Gero | 304 |
| wcx19911123 | 293 |
| Milan- | 279 |
| ouranhshc | 262 |
| alacat | 248 |
| NatsumeRin | 247 |
| MoonFragrance | 234 |
| James | 232 |
| Fycho | 221 |
beatmaps %>%
subset(!duplicated(beatmaps$beatmapset_id)) %>% # Keep rows with unique beatmapset_id
arrange(desc(favourite_count)) %>%
head(50) %>% `[`(c("creator", "artist", "title", "favourite_count")) %>% kable
| creator | artist | title | favourite_count |
|---|---|---|---|
| W h i t e | Kuba Oms | My Love | 13163 |
| Fort | Panda Eyes & Teminite | Highscore | 10292 |
| jonathanlfj | cYsmix feat. Emmy | Tear Rain | 9763 |
| Charles445 | Rostik | Liquid (Paul Rosenthal Remix) | 8282 |
| VINXIS | Reol | No title | 7604 |
| Ekoro | UNDEAD CORPORATION | Everything will freeze | 6886 |
| Kuria | Linked Horizon | Guren no Yumiya (TV Size) | 6880 |
| Doormat | ClariS | Hitorigoto -TV MIX- | 6606 |
| Awaken | Konuko | Toumei Elegy | 6534 |
| Voltaeyx | TheFatRat | Mayday (feat. Laura Brehm) | 6335 |
| Takuya | S3RL | Pika Girl | 5510 |
| Bearizm | Station Earth | Cold Green Eyes ft. Roos Denayer | 5425 |
| Saten-san | Yousei Teikoku | Kokou no Sousei | 5300 |
| gowww | Hatsune Miku & Megpoid Gumi | MATRYOSHKA | 4878 |
| h3k1ru | Yiruma & Skullee | River Flows In You (A Love Note) | 4849 |
| ouranhshc | Masayoshi Minoshima feat. nomico | Bad Apple!! | 4829 |
| -kevincela- | Rameses B | Flaklypa | 4829 |
| ktgster | Chasers | Lost | 4770 |
| Sekai-nyan | Suzuki Konomi | This game (TV Size) | 4618 |
| Kuria | ONE OK ROCK | Answer is Near | 4591 |
| eLy | Feint | Tower Of Heaven (You Are Slaves) | 4461 |
| Secretpipe | S3RL | Bass Slut (Original Mix) | 4399 |
| Monstrata | RADWIMPS | Zen Zen Zense (movie ver.) | 4352 |
| Natsu | Hanatan | Airman ga Taosenai (SOUND HOLIC Ver.) | 4328 |
| NatsumeRin | Hatsune Miku | Senbonzakura (Short Ver.) | 3966 |
| kristi71111 | TK from Ling tosite sigure | unravel (TV edit) | 3942 |
| Rue | DJ Genericname | Dear You | 3886 |
| Monstrata | Porter Robinson & Madeon | Shelter | 3860 |
| Asphyxia | xi | Blue Zenith | 3849 |
| Kagetsu | KANA-BOON | Silhouette | 3839 |
| osuplayer111 | Getter Jaani | Rockefeller Street (Nightcore Mix) | 3703 |
| Smoothie | UNDEAD CORPORATION | Yoru Naku Usagi wa Yume o Miru | 3677 |
| Star Stream | Wotamin | Gigantic O.T.N | 3627 |
| Kyshiro | toby fox | MEGALOVANIA | 3613 |
| Blue Dragon | The Quick Brown Fox | The Big Black | 3403 |
| RLC | Himeringo | Yotsuya-san ni Yoroshiku | 3377 |
| Multiple Creators | Soleily | Renatus | 3345 |
| Sherry | Nanahira | Frightfully-insane Flan-chan’s frightful song | 3327 |
| jonathanlfj | Reol | Plus Danshi ver Reol | 3324 |
| Len | Shawn Wasabi | Marble Soda | 3288 |
| Ephemeral | Masayoshi Minoshima ft. nomico | Bad Apple!! | 3219 |
| Tarrasky | Agnete Kjolsrud | Get Jinxed | 3165 |
| Guy | Aoi Eir | IGNITE (TV size ver.) | 3119 |
| AllStar12 | yuikonnu & ayaponzu* | Super Nuko World | 3033 |
| Garven | Saiya | Remote Control | 3028 |
| handsome | Reol | MONSTER | 3009 |
| Gaia | Reol | Asymmetry | 2993 |
| rui | Hatsune Miku | Rubik’s Cube | 2967 |
| Monstrata | 9mm Parabellum Bullet | Inferno | 2959 |
| Jacob | NOMA | Brain Power | 2908 |
beatmaps %>%
arrange(desc(playcount)) %>%
head(50) %>% `[`(c("creator", "artist", "title", "version", "playcount")) %>% kable
| creator | artist | title | version | playcount |
|---|---|---|---|---|
| W h i t e | Kuba Oms | My Love | Hard | 21429513 |
| W h i t e | Kuba Oms | My Love | Normal | 20405157 |
| jonathanlfj | cYsmix feat. Emmy | Tear Rain | Normal | 18188427 |
| Blue Dragon | The Quick Brown Fox | The Big Black | WHO’S AFRAID OF THE BIG BLACK | 14209863 |
| jonathanlfj | cYsmix feat. Emmy | Tear Rain | Hard | 12840626 |
| ktgster | Chasers | Lost | Normal | 12456963 |
| -kevincela- | Rameses B | Flaklypa | Normal | 12124613 |
| Charles445 | Rostik | Liquid (Paul Rosenthal Remix) | Easy | 10881187 |
| Blue Dragon | Team Nekokan | Can’t Defeat Airman | Holy Shit! It’s Airman!! | 10878775 |
| -kevincela- | Rameses B | Flaklypa | Hard | 10611576 |
| W h i t e | Kuba Oms | My Love | Insane | 9287944 |
| Multiple Creators | Soleily | Renatus | Normal | 8065171 |
| jonathanlfj | cYsmix feat. Emmy | Tear Rain | Insane | 8028482 |
| ktgster | Chasers | Lost | Hard | 7715600 |
| Rue | DJ Genericname | Dear You | Dear Rue | 7548246 |
| Bearizm | Station Earth | Cold Green Eyes ft. Roos Denayer | Divine | 7343941 |
| h3k1ru | Yiruma & Skullee | River Flows In You (A Love Note) | Love Note | 7328010 |
| VINXIS | Reol | No title | Light Insane | 6820213 |
| Charles445 | Rostik | Liquid (Paul Rosenthal Remix) | Normal | 6818829 |
| val0108 | Lily | Scarlet Rose | 0108 style | 6684374 |
| Charles445 | Rostik | Liquid (Paul Rosenthal Remix) | Hard | 6659517 |
| Ekoro | UNDEAD CORPORATION | Everything will freeze | Insane | 6604128 |
| Reikin | Nico Nico Douga | U.N. Owen Was Her? | Normal | 6495851 |
| Fort | Panda Eyes & Teminite | Highscore | Another | 6442871 |
| Fort | Panda Eyes & Teminite | Highscore | LGV’s Insane | 6255646 |
| Lust | Tsunamaru | Daidai Genome | Insane | 6182514 |
| Takuya | S3RL | Pika Girl | Hard | 5910259 |
| Saten-san | Yousei Teikoku | Kokou no Sousei | Hard | 5760621 |
| Nakagawa-Kanon | xi | FREEDOM DiVE | Another | 5520280 |
| Kuria | Linked Horizon | Guren no Yumiya (TV Size) | DS’s Hard | 5517593 |
| Flask | Fujijo Seitokai Shikkou-bu | Best FriendS -TV Size- | Fycho’s Insane | 5498716 |
| VINXIS | Reol | No title | byfaR’s Hard | 5419243 |
| Garven | Saiya | Remote Control | Insane | 5395495 |
| xxdeathx | FLOWxGRANRODEO | 7 -seven- -TV SIZE - | Expert | 5357990 |
| Damnae | raja | the light | Normal | 5289914 |
| Kuria | Linked Horizon | Guren no Yumiya (TV Size) | alacat’s Normal | 5244344 |
| Fort | Panda Eyes & Teminite | Highscore | Hyper | 5227173 |
| Doormat | ClariS | Hitorigoto -TV MIX- | Insane | 5173748 |
| Bearizm | Station Earth | Cold Green Eyes ft. Roos Denayer | apple’s Insane | 5172218 |
| galvenize | DJ Fresh | Gold Dust | Insane | 5150833 |
| Taeyang | kradness&Reol | Remote Control | Max Control! | 5141365 |
| Luerxa | Primastella | Koigokoro | Delis’ Insane | 5138932 |
| JauiPlaY | DJ Okawari | Flower Dance | Flower | 5138709 |
| Multiple Creators | Soleily | Renatus | Hard | 5085398 |
| val0108 | Hatsune Miku | Mythologia’s End | Myth0108ia | 5076064 |
| Takuya | S3RL | Pika Girl | Insane | 5024386 |
| Natsu | Hanatan | Airman ga Taosenai (SOUND HOLIC Ver.) | Insane | 4916733 |
| Star Stream | Sagara Kokoro | Hoshizora no Ima | S.S | 4879941 |
| Nakagawa-Kanon | xi | FREEDOM DiVE | FOUR DIMENSIONS | 4753427 |
| Kuria | Linked Horizon | Guren no Yumiya (TV Size) | Insane | 4679371 |
# Scatterplot of AR vs BPM
ggplot(std, aes(bpm, diff_approach)) +
ggtitle("Approach Rate vs BPM") +
scale_x_continuous(limits=c(0,500)) +
AR_y_scale +
geom_point(alpha=0.1)
# Scatterplot of SR vs total length time
ggplot(std, aes(total_length, difficultyrating)) +
ggtitle("Star Rating vs Total Length") +
length_x_scale +
SR_y_scale +
geom_point(alpha=0.1)
# Scatterplot of max combo vs drain time
ggplot(std, aes(hit_length, max_combo)) +
ggtitle("Max Combo vs Drain Time") +
length_x_scale +
scale_y_continuous(limits=c(0,4000)) +
geom_point(alpha=0.05)
# High linear correlation, as expected
summary(lm(max_combo ~ hit_length, data=std))
##
## Call:
## lm(formula = max_combo ~ hit_length, data = std)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3860.7 -134.3 -17.4 120.5 23020.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -99.20894 2.51272 -39.48 <2e-16 ***
## hit_length 4.89613 0.01715 285.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 272.8 on 56923 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.5888, Adjusted R-squared: 0.5888
## F-statistic: 8.152e+04 on 1 and 56923 DF, p-value: < 2.2e-16
# Scatterplot of favorite count vs playcount
ggplot(std, aes(playcount, favourite_count)) +
ggtitle("Favorite Count vs Playcount") +
scale_x_continuous(limits=c(0,1000000)) +
scale_y_continuous(limits=c(0,1000)) +
geom_point(alpha=0.05)
# Scatterplot of playcount vs total length
ggplot(std, aes(total_length, playcount)) +
ggtitle("Playcount vs Total Length") +
length_x_scale +
scale_y_continuous(limits=c(0,1000000)) +
geom_point(alpha=0.1)
# Scatterplot of AR vs date approved
ggplot(std, aes(approved_date, diff_approach)) +
ggtitle("Approach Rate vs Date Approved") +
AR_y_scale +
approved_x_scale +
geom_point(alpha=0.05)
# Scatterplot of SR vs date approved
ggplot(std, aes(approved_date, difficultyrating)) +
ggtitle("Star Rating vs Date Approved") +
SR_y_scale +
approved_x_scale +
geom_point(alpha=0.1)
# Playcount by song time, categorized by spread icon
# https://osu.ppy.sh/help/wiki/Difficulties#star-rating Not sure about values between boundaries
spread.sr = c(0, 1.51, 2.26, 3.76, 5.26, 6.76)
spread.names = c("Easy", "Normal", "Hard", "Insane", "Expert", "Expert+")
spread.colors = c("olivedrab3", "paleturquoise", "gold", "hotpink", "purple", "darkgray")
# Assign difficulty rating by spread ranges to spread names
beatmaps$spread_name <- spread.names[cut(beatmaps$difficultyrating, spread.sr, right=FALSE, labels=FALSE)]
std <- beatmaps[beatmaps$mode == "Standard",] # update std
hitlength.bins = seq(0, 360, 30)
par(mfrow=c(2,3), mar=c(4,4,4,1), cex.main=2)
for (i in 1:length(spread.names)) {
std.spread = std[std$spread_name == spread.names[i], ]
playcount.bin.sum = sapply(split(std.spread, cut(std.spread$hit_length, hitlength.bins)),
function(df) sum(df$playcount))
barplot(playcount.bin.sum, space=0, width=30, xlab="Hit length (s)", ylab="Playcount Total", main=spread.names[i],
col=spread.colors[i], axisnames=FALSE)
axis(1, at=hitlength.bins)
}
# Same but 150+ hitlength and stacked bars
hitlength.bins.150 = seq(150, 360, 30)
playcount.bin.mat = matrix(ncol=length(hitlength.bins.150)-1, nrow=length(spread.names))
colnames(playcount.bin.mat) = head(hitlength.bins.150, -1)
rownames(playcount.bin.mat) = spread.names
for (i in 1:nrow(playcount.bin.mat)) {
std.spread = std[std$spread_name == spread.names[i], ]
playcount.bin.mat[i,] = sapply(split(std.spread, cut(std.spread$hit_length, hitlength.bins.150)),
function(df) sum(df$playcount))
}
dev.off() # Reset par
## null device
## 1
barplot(playcount.bin.mat, space=0, width=30, col=spread.colors, xlab="Hitlength (s)", ylab="Total Playcount",
legend.text=spread.names, axisnames=FALSE, main="Total Playcount by Hitlength and Difficulty")
axis(1, at=hitlength.bins.150-hitlength.bins.150[1], labels=hitlength.bins.150)